library(tidyverse)
dat <- readRDS(file = "dat.RDS")
`%nin%` = Negate(`%in%`)

# --------------------- Participant Selection ------------------------------

# participant selection
dat <- dat %>% 
  filter(iteration %in% -1:17) %>% # keepfirst cohort
  filter(part_status == 5) %>% # keep included subjects
  filter(startedSurvey == 1) # subjects started survey 

dat <- dat %>%
  mutate(stress = PROscorerTools::rerange100(stress, mn = 1, mx = 4)) %>%
  mutate(rate = rate*100)

dat$contactCat <-relevel(dat$contactCat, ref = "Email")

# --------------------- RQ1: Cortisol Participation over Time ------------------------------

dat_cortisolPart <- dat %>%
  filter(month %in% c(1,4,7,10,13)) %>%
  select("userId", "month","cortisolStatus") %>%
  mutate(cortisolStatusGeneral = ifelse(cortisolStatus %in% c("Not Interested in Cortisol Study",
                                                              "Excluded Because Closed Questionaire after Indicating Interest"),
                                        "Not Willing to Participate in HCC Study", as.character(cortisolStatus))) %>%
  mutate(cortisolStatusGeneral = factor(cortisolStatusGeneral, 
                                        levels = c("Hair Probe was Recieved",
                                                   "Collection Kit was sent out but no Hair was sent in",
                                                   "Eligable but No Hair Collection Kit was Send Out",
                                                   "Excluded due to Cortison Medication",
                                                   "Excluded due to Short Hair",
                                                   "Not Willing to Participate in HCC Study",
                                                   "Active, but Missed the Cortisol Questionaire",
                                                   "Active, but excluded due to no intrest/eligibilty in wave 1",
                                                   "Not Active"),
                                        labels = c("Sent in hair",
                                                   "HCC kit was sent out but no hair sample was sent in",
                                                   "Eligible but no HCC kit was sent out",
                                                   "Not eligible due to cortisone medication",
                                                   "Not eligible due to short hair",
                                                   "Not willing to participate in HCC study",
                                                   "Missed the HCC questionaire in this wave",
                                                   "Excluded from HCC after not being willing or eligible in wave 1",
                                                   "Did not respond to any survey items in this wave")))

HHC_behavior_wide <- dat_cortisolPart %>%
  select("userId", "month","cortisolStatusGeneral") %>%
  pivot_wider(id_cols = userId, names_from = month, names_prefix = "M" ,values_from = "cortisolStatusGeneral") %>%
  select(-userId)

HCC_table <- sapply(HHC_behavior_wide, table) 

HCC_table_df <- HCC_table %>%
  as.data.frame()

noResponseHCCOutcomes <- c("Missed the HCC questionaire in this wave",
                        "Excluded from HCC after not being willing or eligible in wave 1")

table1_noResponse <- HCC_table_df %>%
  filter(rownames(.) %nin% c("Did not respond to any survey items in this wave")) %>%
  rbind(TotalActive = colSums(.)) %>%
  rownames_to_column(var = "HCC_outcome") %>%
  pivot_longer(cols = M1:M13) %>%
  pivot_wider(name, names_from = HCC_outcome) %>%
  mutate(missedHHCQ_perc = round(`Missed the HCC questionaire in this wave`/TotalActive, 3)*100,
         `Missed the HCC questionaire in this wave` = paste0(missedHHCQ_perc, "% (N = ", `Not willing to participate in HCC study`, ")"),
         excludedMissed_perc = round(`Excluded from HCC after not being willing or eligible in wave 1`/TotalActive, 3)*100,
         `Excluded from HCC after not being willing or eligible in wave 1` = paste0(excludedMissed_perc, "% (N = ", `Excluded from HCC after not being willing or eligible in wave 1`, ")")) %>%
  mutate(TotalActive = as.character(TotalActive)) %>%
  select(-missedHHCQ_perc) %>%
  pivot_longer(cols = c(`Missed the HCC questionaire in this wave`, `Excluded from HCC after not being willing or eligible in wave 1`,"TotalActive"), names_to = "HCC_outcome") %>%
  pivot_wider(id_cols = HCC_outcome, names_from = name)

notwillingHCCOutcomes <- c("Sent in hair",
                        "HCC kit was sent out but no hair sample was sent in",
                        "Eligible but no HCC kit was sent out",
                        "Not eligible due to cortisone medication",
                        "Not eligible due to short hair",
                        "Not willing to participate in HCC study")
table1_notWilling <- HCC_table_df %>%
  rbind(SubmittedTotal = colSums(HCC_table_df[notwillingHCCOutcomes,])) %>%
  filter(rownames(.) %in% c("SubmittedTotal",
                            "Not willing to participate in HCC study")) %>%
  rownames_to_column(var = "HCC_outcome") %>%
  pivot_longer(cols = M1:M13) %>%
  pivot_wider(name, names_from = HCC_outcome) %>%
  mutate(notWilling_perc = round(`Not willing to participate in HCC study`/SubmittedTotal, 3)*100,
         `Not willing to participate in HCC study` = paste0(notWilling_perc, "% (N = ", `Not willing to participate in HCC study`, ")")) %>%
  mutate(SubmittedTotal = as.character(SubmittedTotal)) %>%
  select(-notWilling_perc) %>%
  pivot_longer(cols = c(`Not willing to participate in HCC study`, "SubmittedTotal"), names_to = "HCC_outcome") %>%
  pivot_wider(id_cols = HCC_outcome, names_from = name)

willingHCCOutcomes <- c("Sent in hair",
                        "HCC kit was sent out but no hair sample was sent in",
                        "Eligible but no HCC kit was sent out",
                        "Not eligible due to cortisone medication",
                        "Not eligible due to short hair")

table1_willing <- HCC_table_df %>%
  rbind(willingTotal = colSums(HCC_table_df[willingHCCOutcomes,])) %>%
  filter(rownames(.) %in% c("Not eligible due to cortisone medication",
                            "Not eligible due to short hair", 
                            "willingTotal")) %>%
  rownames_to_column(var = "HCC_outcome") %>%
  pivot_longer(cols = M1:M13) %>%
  pivot_wider(name, names_from = HCC_outcome) %>%
  mutate(drugs_perc = round(`Not eligible due to cortisone medication`/willingTotal, 3)*100,
         `Not eligible due to cortisone medication` = paste0(drugs_perc, "% (N = ", `Not eligible due to cortisone medication`, ")"),
         hair_perc = round(`Not eligible due to short hair`/willingTotal, 3)*100,
         `Not eligible due to short hair` = paste0(hair_perc, "% (N = ", `Not eligible due to short hair`, ")")) %>%
  mutate(willingTotal = as.character(willingTotal)) %>%
  select(-c("drugs_perc", "hair_perc")) %>%
  pivot_longer(cols = c(`Not eligible due to cortisone medication`,`Not eligible due to short hair`, "willingTotal"), names_to = "HCC_outcome") %>%
  pivot_wider(id_cols = HCC_outcome, names_from = name)

eligibleHCCOutcomes <- c("Sent in hair",
                         "HCC kit was sent out but no hair sample was sent in",
                         "Eligible but no HCC kit was sent out")

table1_eligible <- HCC_table_df %>%
  rbind(eligibleTotal = colSums(HCC_table_df[eligibleHCCOutcomes,])) %>%
  filter(rownames(.) %in% c("Eligible but no HCC kit was sent out","eligibleTotal")) %>%
  rownames_to_column(var = "HCC_outcome") %>%
  pivot_longer(cols = M1:M13) %>%
  pivot_wider(name, names_from = HCC_outcome) %>%
  mutate(noKit_perc = round(`Eligible but no HCC kit was sent out`/eligibleTotal, 3)*100,
         `Eligible but no HCC kit was sent out` = paste0(noKit_perc, "% (N = ", `Eligible but no HCC kit was sent out`, ")")) %>%
  mutate(eligibleTotal = as.character(eligibleTotal)) %>%
  select(-c("noKit_perc")) %>%
  pivot_longer(cols = c(`Eligible but no HCC kit was sent out`,"eligibleTotal"), names_to = "HCC_outcome") %>%
  pivot_wider(id_cols = HCC_outcome, names_from = name)

sentKitHCCOutcomes <- c("Sent in hair",
                        "HCC kit was sent out but no hair sample was sent in")

table1_sentKits <- HCC_table_df %>%
  rbind(sentKitsTotal = colSums(HCC_table_df[sentKitHCCOutcomes,])) %>%
  filter(rownames(.) %in% c(sentKitHCCOutcomes,"sentKitsTotal")) %>%
  rownames_to_column(var = "HCC_outcome") %>%
  pivot_longer(cols = M1:M13) %>%
  pivot_wider(name, names_from = HCC_outcome) %>%
  mutate(sentInHair_perc = round(`Sent in hair`/sentKitsTotal, 3)*100,
         `Sent in hair` = paste0(sentInHair_perc, "% (N = ", `Sent in hair`, ")"),
         didNotSendIn_perc = round(`HCC kit was sent out but no hair sample was sent in`/sentKitsTotal, 3)*100,
         `HCC kit was sent out but no hair sample was sent in` = paste0(didNotSendIn_perc, "% (N = ", `HCC kit was sent out but no hair sample was sent in`, ")")) %>%
  mutate(sentKitsTotal = as.character(sentKitsTotal)) %>%
  select(-c("sentInHair_perc", "didNotSendIn_perc")) %>%
  pivot_longer(cols = c(`Sent in hair`,`HCC kit was sent out but no hair sample was sent in`, "sentKitsTotal"), names_to = "HCC_outcome") %>%
  pivot_wider(id_cols = HCC_outcome, names_from = name)

table1 <- rbind(table1_noResponse, table1_notWilling, table1_willing, table1_eligible, table1_sentKits) %>%
  filter(HCC_outcome != "HCC kit was sent out but no hair sample was sent in") %>%
  mutate(HCC_outcome = factor(HCC_outcome, 
                              levels = c("TotalActive", 
                                         "Missed the HCC questionaire in this wave",
                                         "Excluded from HCC after not being willing or eligible in wave 1",
                                         "SubmittedTotal",
                                         "Not willing to participate in HCC study",
                                         "willingTotal",
                                         "Not eligible due to cortisone medication",
                                         "Not eligible due to short hair",
                                         "eligibleTotal",
                                         "Eligible but no HCC kit was sent out",
                                         "sentKitsTotal",
                                         "HCC kit was sent out but no hair sample was sent in",
                                         "Sent in hair"),
                              labels = c("Active GJSP respondents", 
                                         "Did not respond to HCC questionnaire",
                                         "Excluded because not willing or eligible in wave 1",
                                         "Completed HCC questionnaire", 
                                         "Not willing to participate in HCC study",
                                         "Willing to participate in HCC study",
                                         "Not eligible due to cortisone-based medication",
                                         "Not eligible due to short hair",
                                         "Eligible for HCC collection",
                                         "Invalid mailing address/missed previous HCC collection wave",
                                         "Hair collection kit was mailed out",
                                         "Hair sample was not sent in",
                                         "Hair sample was sent in"))) %>%
  arrange(HCC_outcome)

sjPlot::tab_df(table1, file="../Tables and Figures/R-Exports/Tables/Table1.doc")


# plot cortisol status over time
FigureS2 <- dat_cortisolPart %>%
  select("userId", "month","cortisolStatusGeneral") %>%
  pivot_wider(id_cols = userId, names_from = month, names_prefix = "cortisolStatus_M" ,values_from = "cortisolStatusGeneral") %>%
  arrange(desc(cortisolStatus_M1), desc(cortisolStatus_M4), desc(cortisolStatus_M7), desc(cortisolStatus_M10), desc(cortisolStatus_M13)) %>%
  mutate(sortedID = row_number()) %>%
  pivot_longer(cols = starts_with("cortisolStatus_"), names_to = "mzp", names_prefix = "cortisolStatus_", values_to = "cortisolStatus") %>%
  mutate(mzp = factor(mzp, levels = c("M1", "M4", "M7", "M10", "M13"))) %>%
  ggplot(aes(x = mzp,
           y = sortedID,
           fill = cortisolStatus)) +
  geom_tile() +
  theme_classic()+
  theme(legend.position="bottom",
        legend.title = element_blank(),
        text = element_text(size = 15))+
  scale_fill_manual(values = c(ggsci::pal_npg("nrc")(6)[1:6], "gray40", "gray90", "white"),
                    na.value = "black")+
  guides(fill=guide_legend(nrow=3,byrow=TRUE)) +
  xlab("Survey Wave") +
  ylab("Participant") +
  scale_x_discrete(expand = c(0, 0))
  

ggsave(filename = "../Tables and Figures/Supplement/FigureS2.tiff", plot = FigureS2, dpi = 300,
       height = 20, width = 40, units = "cm")


# --------------------- RQ2: Cortisol Participation in Wave 1 ------------------------------
dat <- dat %>%
  mutate(stress_c = scale(stress, center = TRUE, scale = FALSE),
         neuro_c = scale(neuro, center = TRUE, scale = FALSE),
         open_c = scale(open, center = TRUE, scale = FALSE),
         extra_c = scale(extra, center = TRUE, scale = FALSE),
         agree_c = scale(agree, center = TRUE, scale = FALSE),
         consc_c = scale(consc, center = TRUE, scale = FALSE),
         age_c = scale(age, center = TRUE, scale = FALSE),
         rate_c = scale(rate, center = TRUE, scale = FALSE))

pred_hypo <- c("age_c", "male", "otherGender", "college","stress_c")
pred_hypo_names <- c("Age", "Male (ref.: female)", "Other gender (ref.: female)", "Tertiary degree (ref.: no college degree)", "Perceived stress (POMP score)")

pred_big5 <- c("neuro_c", "open_c", "extra_c", "agree_c", "consc_c")
pred_big5_names <-c("Neuroticism", "Openess", "Extraversion", "Agreeableness", "Conscientiousness")

pred_time <- c("workh_cat", "longHours",
               "newJob", "loseJob", "changeJob", "changeSelfEmp", "changeParttime","endEMP")
pred_time_names <- c("Marginal employment (ref.: full-time employment)", 
                        "Part-time employment (ref.: full-time employment)", 
                        "Flexible working hours (ref.: full-time employment)",
                        "Long hours",
                     "Expectation of more than 50% to look for a new position within six months (ref.: less than 50%)", 
                      "Expectation of more than 50% to actually lose job within six months (ref.: less than 50%)", 
                      "Expectation of more than 50% to give up their current profession and start another one within six months (ref.: less than 50%)",
                      "Expectation of more than 50% to become self-employed within six months (ref.: less than 50%)", 
                      "Expectation of more than 50% to substantially change their working hours within six months (ref.: less than 50%)", 
                      "Expectation of more than 50% to give up working entirely within six months (ref.: less than 50%)")

pred_studySpecifics <- c("contactCat","sample")
pred_studySpecifics_names <- c("Letter from July 2018 to May 2019, no email address provided (ref.: email)",
                               "Letter from Dec. 2017 to June 2018 (ref.: email)", 
                               "Preannouncement (ref.: email)", 
                               "Letter from Nov 2018 to May 2019, email address provided (ref.: email)",
                               "Mass-layoff sample (ref.: not Mass-layoff sample)")

pred_all <- c(pred_hypo, pred_big5, pred_time, "rate_c", pred_studySpecifics)
pred_all_names <- c(pred_hypo_names, pred_big5_names, pred_time_names, "% of answered items in M1", pred_studySpecifics_names)

# define cortisol participation outcomes
dat <- dat %>%
  mutate(willingness = ifelse(cortisolStatus %in% c("Hair Probe was Recieved",
                                                    "Collection Kit was sent out but no Hair was sent in",
                                                    "Eligable but No Hair Collection Kit was Send Out",
                                                    "Excluded due to Cortison Medication",
                                                    "Excluded due to Short Hair",
                                                    "Closed Questionaire after Indicating Interest"), 1, 
                              ifelse(cortisolStatus == "Not Interested in Cortisol Study", 0, NA)),
         eligiblility= ifelse(cortisolStatus %in% c("Hair Probe was Recieved",
                                                                       "Collection Kit was sent out but no Hair was sent in",
                                                                       "Eligable but No Hair Collection Kit was Send Out"), 1, 
                                                 ifelse(cortisolStatus %in% c("Excluded due to Cortison Medication",
                                                                              "Excluded due to Short Hair"),0, NA)),
         sendInHair = ifelse(cortisolStatus == "Hair Probe was Recieved", 1,
                             ifelse(cortisolStatus %in% c("Collection Kit was sent out but no Hair was sent in"), 0, NA)),
         gender = factor(gender, levels = 1:3, labels = c("female", "male", "other gender")))


## Table 1: Descriptive Table
covs_tab1 <- c("age", "gender", "college", "married", "hhInc_linear",
               "stress", pred_big5, pred_time, "rate", pred_studySpecifics)
factorCovs_tab1 <-c("gender", "college", "married", pred_time, "contactCat", "sample")
tab_all <- tableone::CreateTableOne(vars = covs_tab1, 
                                    data = filter(dat, month == 1), 
                                    factorVars = factorCovs_tab1)
tab_allCortisol <- tableone::CreateTableOne(vars = covs_tab1, data = filter(dat, month == 1 & cortisolStatus %nin% c("Active, but Missed the Cortisol Questionaire",
                                                                                                                    "Not Active")), 
                                            factorVars = factorCovs_tab1)
tab_willingness <- tableone::CreateTableOne(vars = covs_tab1, data = filter(dat, month == 1 & willingness == 1), 
                                            factorVars = factorCovs_tab1)
tab_eligibility <- tableone::CreateTableOne(vars = covs_tab1, data = filter(dat, month == 1 & eligiblility == 1), 
                                            factorVars = factorCovs_tab1)
tab_sendInHair <- tableone::CreateTableOne(vars = covs_tab1, data = filter(dat, month == 1 & sendInHair == 1), 
                                           factorVars = factorCovs_tab1)


tab_export_all <- print(tab_all, exact = "stage", quote = FALSE, noSpaces = TRUE, printToggle = FALSE)
tab_export_allCortisol <- print(tab_allCortisol, exact = "stage", quote = FALSE, noSpaces = TRUE, printToggle = FALSE)
tab_export_willingness <- print(tab_willingness, exact = "stage", quote = FALSE, noSpaces = TRUE, printToggle = FALSE)
tab_export_eligibility <- print(tab_eligibility, exact = "stage", quote = FALSE, noSpaces = TRUE, printToggle = FALSE)
tab_export_sendInHair <- print(tab_sendInHair, exact = "stage", quote = FALSE, noSpaces = TRUE, printToggle = FALSE)

## Save to a CSV file
write.csv(tab_export_all, file = "../Tables and Figures/R-Exports/Table2_all.csv")
write.csv(tab_export_allCortisol, file = "../Tables and Figures/R-Exports/Table2_allCortisol.csv")
write.csv(tab_export_willingness, file = "../Tables and Figures/R-Exports/Table2_willingness.csv")
write.csv(tab_export_eligibility, file = "../Tables and Figures/R-Exports/Table2_eligibility.csv")
write.csv(tab_export_sendInHair, file = "../Tables and Figures/R-Exports/Table2_sendInHair.csv")

## Run Logistic Regression Models

# outcome 1: Willingness
covs1 <- c(pred_hypo, "rate_c", pred_studySpecifics)
covs2 <- c(pred_big5, "rate_c", pred_studySpecifics)
covs3 <- c(pred_time, "rate_c", pred_studySpecifics)
covs4 <- c(pred_hypo, pred_big5, pred_time, "rate_c", pred_studySpecifics)

formula_willingness_hypo    <- as.formula(paste0("willingness ~ ",paste(covs1,collapse = " + ")))
formula_willingness_big5    <- as.formula(paste0("willingness ~ ",paste(covs2,collapse = " + ")))
formula_willingness_time    <- as.formula(paste0("willingness ~ ",paste(covs3,collapse = " + ")))
formula_willingness_overall <- as.formula(paste0("willingness ~ ",paste(covs4,collapse = " + ")))

dat_regWilling <- dat %>%
  filter(month == 1) %>%
  select("willingness", all_of(covs4)) %>%
  na.omit()

out_willingness_hypo <- glm(formula_willingness_hypo, data = dat_regWilling, family = "binomial")
out_willingness_big5 <- glm(formula_willingness_big5, data = dat_regWilling, family = "binomial")
out_willingness_time <- glm(formula_willingness_time, data = dat_regWilling, family = "binomial")
out_willingness_overall <- glm(formula_willingness_overall, data = dat_regWilling, family = "binomial")

runBivariateRegWilling <- function(outcome, cov){
  fit <- glm(as.formula(paste0(outcome, " ~ ", cov)), data = dat_regWilling, family = "binomial")
  est <- summary(fit)$coefficients[,1]
  se <- summary(fit)$coefficients[,2]
  z <- summary(fit)$coefficients[,3]
  pval <- summary(fit)$coefficients[,4]
  out <- data.frame(cbind(est, se, z, pval))
  out <- rownames_to_column(out, "param") %>%
    filter(param != "(Intercept)")
  return(out)
}

bivariateRegWilling_List <- lapply(covs4, runBivariateRegWilling, outcome = "willingness")
bivariateRegWilling <- bind_rows(bivariateRegWilling_List)

bivariateRegWillingClean <- bivariateRegWilling %>%
  mutate_at(vars("est", "se", "z", "pval"), 
            funs(round(., 3))) %>%
  mutate(param = str_replace(param, "age_c", pred_all_names[1]),
         param = str_replace(param, "male", pred_all_names[2]),
         param = str_replace(param, "otherGender", pred_all_names[3]),
         param = str_replace(param, "college", pred_all_names[4]),
         param = str_replace(param, "stress_c", pred_all_names[5]),
         param = str_replace(param, "neuro_c", pred_all_names[6]),
         param = str_replace(param, "open_c", pred_all_names[7]),
         param = str_replace(param, "extra_c", pred_all_names[8]),
         param = str_replace(param, "agree_c", pred_all_names[9]),
         param = str_replace(param, "consc_c", pred_all_names[10]),
         param = str_replace(param, "workh_catmarginal employment", pred_all_names[11]),
         param = str_replace(param, "workh_catpart-time employment", pred_all_names[12]),
         param = str_replace(param, "workh_catflexible working hours", pred_all_names[13]),
         param = str_replace(param, "longHours", pred_all_names[14]),
         param = str_replace(param, "newJob", pred_all_names[15]),
         param = str_replace(param, "loseJob", pred_all_names[16]),
         param = str_replace(param, "changeJob", pred_all_names[17]),
         param = str_replace(param, "changeSelfEmp", pred_all_names[18]),
         param = str_replace(param, "changeParttime", pred_all_names[19]),
         param = str_replace(param, "endEMP", pred_all_names[20]),
         param = str_replace(param, "rate", pred_all_names[21]),
         param = str_replace(param, "contactCatLetter (no email available)", pred_all_names[22]),
         param = str_replace(param, "contactCatEarly Recruitment Waves (Letter)", pred_all_names[23]),
         param = str_replace(param, "contactCatPre-Announcement (email available)", pred_all_names[24]),
         param = str_replace(param, "contactCatLetter(email available)", pred_all_names[25]),
         param = str_replace(param, "sample", pred_all_names[26]))

bivariateRegWillingClean_export <- bivariateRegWillingClean %>%
  mutate(est = ifelse(pval < 0.01, paste0(est, "**"), est),
         est = ifelse(pval < 0.05, paste0(est, "*"), as.character(est)),
         se = paste0("(", as.character(se), ")")) %>%
  select(param, est, se) %>%
  pivot_longer(cols = c(est, se),
               names_to = "type",
               values_to = "value")

sjPlot::tab_df(bivariateRegWillingClean_export, 
               file = "../Tables and Figures/R-Exports/Tables/biVarReg_Willing.doc", 
               digits = 2)

# Export Regression Results to a Word table
model_names <- c("Variables with Hypothesis", 
                 "Personality",
                 "Time Availability",
                 "Overall")

stargazer::stargazer(out_willingness_hypo, out_willingness_big5,out_willingness_time, out_willingness_overall, 
                     column.labels = model_names,
                     covariate.labels = pred_all_names,
                     #notes        = ".", 
                     #notes.append = FALSE,
                     star.char = c("", "*", "**"),
                     omit.stat = c("ll", "aic"),
                     type = "text"#,
                     #out=paste0(dir,"/2 Publikationen/Biomarker Study/Tables and Figures/R-Exports/Table2.doc")
)

# Model 2: sendInHair
runBivariateRegsendInHair <- function(outcome, cov){
  fit <- glm(as.formula(paste0(outcome, " ~ ", cov)), data = dat_regsendInHair, family = "binomial")
  est <- summary(fit)$coefficients[,1]
  se <- summary(fit)$coefficients[,2]
  z <- summary(fit)$coefficients[,3]
  pval <- summary(fit)$coefficients[,4]
  out <- data.frame(cbind(est, se, z, pval))
  out <- rownames_to_column(out, "param") %>%
    filter(param != "(Intercept)")
  return(out)
}

bivariateRegsendInHair_List <- lapply(covs4, runBivariateRegsendInHair, outcome = "sendInHair")
bivariateRegsendInHair <- bind_rows(bivariateRegsendInHair_List)

bivariateRegsendInHairClean <- bivariateRegsendInHair %>%
  mutate_at(vars("est", "se", "z", "pval"), 
            funs(round(., 3))) %>%
  mutate(param = str_replace(param, "age_c", pred_all_names[1]),
         param = str_replace(param, "male", pred_all_names[2]),
         param = str_replace(param, "otherGender", pred_all_names[3]),
         param = str_replace(param, "college", pred_all_names[4]),
         param = str_replace(param, "stress_c", pred_all_names[5]),
         param = str_replace(param, "neuro_c", pred_all_names[6]),
         param = str_replace(param, "open_c", pred_all_names[7]),
         param = str_replace(param, "extra_c", pred_all_names[8]),
         param = str_replace(param, "agree_c", pred_all_names[9]),
         param = str_replace(param, "consc_c", pred_all_names[10]),
         param = str_replace(param, "workh_catmarginal employment", pred_all_names[11]),
         param = str_replace(param, "workh_catpart-time employment", pred_all_names[12]),
         param = str_replace(param, "workh_catflexible working hours", pred_all_names[13]),
         param = str_replace(param, "longHours", pred_all_names[14]),
         param = str_replace(param, "newJob", pred_all_names[15]),
         param = str_replace(param, "loseJob", pred_all_names[16]),
         param = str_replace(param, "changeJob", pred_all_names[17]),
         param = str_replace(param, "changeSelfEmp", pred_all_names[18]),
         param = str_replace(param, "changeParttime", pred_all_names[19]),
         param = str_replace(param, "endEMP", pred_all_names[20]),
         param = str_replace(param, "rate", pred_all_names[21]),
         param = str_replace(param, "contactCatLetter (no email available)", pred_all_names[22]),
         param = str_replace(param, "contactCatEarly Recruitment Waves (Letter)", pred_all_names[23]),
         param = str_replace(param, "contactCatPre-Announcement (email available)", pred_all_names[24]),
         param = str_replace(param, "contactCatLetter(email available)", pred_all_names[25]),
         param = str_replace(param, "sample", pred_all_names[26]))

bivariateRegsendInHair_export <- bivariateRegsendInHairClean %>%
  mutate(est = ifelse(pval < 0.01, paste0(est, "**"), est),
         est = ifelse(pval < 0.05, paste0(est, "*"), as.character(est)),
         se = paste0("(", as.character(se), ")")) %>%
  select(param, est, se) %>%
  pivot_longer(cols = c(est, se),
               names_to = "type",
               values_to = "value")

sjPlot::tab_df(bivariateRegsendInHair_export, 
               file = "../Tables and Figures/R-Exports/Tables/biVarReg_sendIn.doc", 
               digits = 2)

formula_sendInHair_hypo     <- as.formula(paste0("sendInHair ~ ",paste(covs1,collapse = " + ")))
formula_sendInHair_big5     <- as.formula(paste0("sendInHair ~ ",paste(covs2,collapse = " + ")))
formula_sendInHair_time     <- as.formula(paste0("sendInHair ~ ",paste(covs3,collapse = " + ")))
formula_sendInHair_overall  <- as.formula(paste0("sendInHair ~ ",paste(covs4,collapse = " + ")))

dat_regsendInHair <- dat %>%
  filter(month == 1) %>%
  select("sendInHair", all_of(covs4)) %>%
  na.omit()

out_sendInHair_hypo <- glm(formula_sendInHair_hypo, data = dat_regsendInHair, family = "binomial")
out_sendInHair_big5 <- glm(formula_sendInHair_big5, data = dat_regsendInHair, family = "binomial")#
out_sendInHair_time <- glm(formula_sendInHair_time, data = dat_regsendInHair, family = "binomial")
out_sendInHair_overall <- glm(formula_sendInHair_overall, data = dat_regsendInHair, family = "binomial")

stargazer::stargazer(out_sendInHair_hypo, out_sendInHair_big5, out_sendInHair_time,out_sendInHair_overall,
                     column.labels = model_names,
                     covariate.labels = pred_all_names,
                     #notes        = ".", 
                     #notes.append = FALSE,
                     star.char = c("", "*", "**"),
                     omit.stat = c("ll", "aic"),
                     type = "text"#,
                     #out=paste0(dir,"/2 Publikationen/Biomarker Study/Tables and Figures/R-Exports/Table2.doc")
)

stargazer::stargazer(out_willingness_hypo, out_willingness_big5,out_willingness_time, out_willingness_overall,
                     out_sendInHair_hypo, out_sendInHair_big5, out_sendInHair_time,out_sendInHair_overall,
                     #column.labels = c(model_names,model_names),
                     column.labels = rep(paste0("M", 1:4),2),
                     covariate.labels = pred_all_names,
                     notes        = "M1: model including variables with a clear hypothesis; M2: model including personality variables; M3: model with employment-related variables; M4: overall model; models M1 - M4 all contain the study- specific control variables", 
                     #notes.append = FALSE,
                     star.char = c("", "*", "**"),
                     omit.stat = c("ll", "aic"),
                     type = "html",
                     out="../Tables and Figures/R-Exports/Tables/Table3.doc")



# compute Odds-Ratios
exp(10*out_willingness_overall$coefficients["age_c"]) 
exp(25*out_willingness_overall$coefficients["stress_c"]) 

# convert logits to probability
logit2prob <- function(logit){
  odds <- exp(logit)
  prob <- odds / (1 + odds)
  return(prob)
}

intercept <- out_willingness_overall$coefficients["(Intercept)"]
estimate_age <- out_willingness_overall$coefficients["age_c"]
estimate_stress <- out_willingness_overall$coefficients["stress_c"]

logit2prob(intercept+10*estimate_age) - logit2prob(intercept) 
logit2prob(intercept+25*estimate_stress) - logit2prob(intercept) 

# --------------------- RQ3: Cortisol Participation as a Predictor of Long-term Panel Participation ------------------------------

# generate "active in month X" variables
dat_rq3 <- dat %>%
  group_by(userId) %>%
  mutate(strongPart_M1 = ifelse(month == 1 & rate > 0, 1, 0),
         strongPart_M1 = max(strongPart_M1),
         activeM13 = ifelse(month >= 13 & rate > 0, 1, 0),
         activeM13 = max(activeM13),
         activeM25 = ifelse(month >= 25 & rate > 0, 1, 0),
         activeM25 = max(activeM25)) %>%
  ungroup()

dat_rq3 <- dat_rq3 %>%
  filter(month == 1 & !is.na(cortisol_consent))

dat_rq3 <- dat_rq3 %>%
  mutate(notWilling = ifelse(cortisolStatus %in% c("Not Interested in Cortisol Study",
                                                   "Excluded Because Closed Questionaire after Indicating Interest"), 1, 0),
         willingness_all = ifelse(cortisolStatus %in% c("Collection Kit was sent out but no Hair was sent in",
                                                    "Eligable but No Hair Collection Kit was Send Out",
                                                    "Excluded due to Cortison Medication",
                                                    "Excluded due to Short Hair"), 1, 0),
         willingness_notEligible = ifelse(cortisolStatus %in% c("Excluded due to Cortison Medication",
                                                        "Excluded due to Short Hair"), 1, 0),
         willingness_Eligible_noKit = ifelse(cortisolStatus == "Eligable but No Hair Collection Kit was Send Out", 1, 0),
         willingness_Eligible_Kit = ifelse(cortisolStatus == "Collection Kit was sent out but no Hair was sent in", 1, 0),
         sendinHair = ifelse(cortisolStatus == "Hair Probe was Recieved", 1, 0))
# run model for month 1
formula_activeM13 <- as.formula(paste0("activeM13 ~",paste(c("willingness_notEligible + willingness_Eligible_noKit + willingness_Eligible_Kit + sendinHair + rate_c",pred_studySpecifics),collapse = " + ")))
formula_activeM13_all <- as.formula(paste0("activeM13 ~",paste(c("willingness_notEligible + willingness_Eligible_noKit + willingness_Eligible_Kit + sendinHair + rate_c",c(pred_studySpecifics, pred_big5)),collapse = " + ")))
formula_activeM25 <- as.formula(paste0("activeM25 ~",paste(c("willingness_notEligible + willingness_Eligible_noKit + willingness_Eligible_Kit + sendinHair + rate_c",pred_studySpecifics),collapse = " + ")))
formula_activeM25_all <- as.formula(paste0("activeM25 ~",paste(c("willingness_notEligible + willingness_Eligible_noKit + willingness_Eligible_Kit + sendinHair + rate_c",c(pred_studySpecifics, pred_big5)),collapse = " + ")))

out_activeM13 <- glm(formula_activeM13, data = dat_rq3, family = "binomial")
out_activeM13_all <- glm(formula_activeM13_all, data = dat_rq3, family = "binomial")
out_activeM25 <- glm(formula_activeM25, data = dat_rq3, family = "binomial")
out_activeM25_all <- glm(formula_activeM25_all, data = dat_rq3, family = "binomial")

names_rq3 <-c("Willing but not eligible (ref.: not willing to participate in HCC)",
              "Willing and eligible but no HCC kit was sent out to subject (ref.: not willing to participate in HCC)",
              "Willing and eligible and HCC kit was sent out to subject but no hair sample was sent back in (ref.: not willing to participate in HCC)",
              "Sent in hair sample (ref.: not willing to participate in HCC)",
              "% of answered items in M1",
              pred_studySpecifics_names)

stargazer::stargazer(out_activeM13, out_activeM25,
                     column.labels = rep(paste0("M", 1:2),2),
                     covariate.labels = names_rq3,
                     #notes.append = FALSE,
                     star.char = c("", "*", "**"),
                     omit.stat = c("ll", "aic"),
                     type = "html",
                     out="../Tables and Figures/R-Exports/Tables/Table4.doc"
                     )


# compute Odds-Ratios
exp(out_activeM13$coefficients["sendinHair"]) 
exp(out_activeM25$coefficients["sendinHair"]) 

# convert logits to probability

intercept_activeM13 <- out_activeM13$coefficients["(Intercept)"]
estimate_activeM13_sendIn <- out_activeM13$coefficients["sendinHair"]
logit2prob(intercept_activeM13+estimate_activeM13_sendIn) - logit2prob(intercept_activeM13) 

intercept_activeM25 <- out_activeM25$coefficients["(Intercept)"]
estimate_activeM25_sendIn <- out_activeM25$coefficients["sendinHair"]
logit2prob(intercept_activeM25+estimate_activeM25_sendIn) - logit2prob(intercept_activeM25) 


# run models again but with sent in hair as reference
# run model for month 1
formula_activeM13_sendInRef <- as.formula(paste0("activeM13 ~",paste(c("willingness_notEligible + willingness_Eligible_noKit + willingness_Eligible_Kit + notWilling + rate_c",pred_studySpecifics),collapse = " + ")))
formula_activeM25_sendInRef <- as.formula(paste0("activeM25 ~",paste(c("willingness_notEligible + willingness_Eligible_noKit + willingness_Eligible_Kit + notWilling + rate_c",pred_studySpecifics),collapse = " + ")))

out_activeM13_sendInRef <- glm(formula_activeM13_sendInRef, data = dat_rq3, family = "binomial")
out_activeM25_sendInRef <- glm(formula_activeM25_sendInRef, data = dat_rq3, family = "binomial")
summary(out_activeM13_sendInRef)
summary(out_activeM25_sendInRef)
